home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Language/OS - Multiplatform Resource Library
/
LANGUAGE OS.iso
/
oper_sys
/
quartz
/
quartz10.lha
/
src
/
runtime
/
profile.c
< prev
next >
Wrap
C/C++ Source or Header
|
1990-05-18
|
12KB
|
558 lines
#include <stdio.h>
#include "thread.h"
#include "synch.h"
#include <usclkc.h>
#include "quartzcommon.h"
#include "profile.h"
#include "internal.h"
#ifndef InitOverflowSize
#define InitOverflowSize 5000
#endif
#define InitNumObjs 10
#define InitNumKids 10
/* These get set by munge */
private int mNumProcIds = 0;
private int mEndOfText = 0;
/* communication from the computation -> sampling processors */
private int profileOn = FALSE;
shared int effectiveParallelism = 0;
shared int nominalParallelism = 0;
shared int profileOver = FALSE;
shared Processor processorList[NUMPROCS];
/* Sampled data -- output to a file */
shared FLOAT timeDiff = 0;
shared int numSamples = 0;
shared ConcurrentData *procData;
shared GraphEntry *pcTable; /* needed by mcount */
shared int pcTableSize;
/* Needed internally to control profiling */
static shared SpinLock *pcTableLocks;
static shared GraphEntry overflowFirst[InitOverflowSize];
static shared GraphEntry *overflow;
static shared SpinLock overflowLock;
static shared int overflowSize = 0;
static shared int overflowOccurred = 0;
static shared SynchSamples *objData;
static shared int objNum = 0;
static shared SpinLock objLock;
static shared ChildData *kidData;
static shared int kidNum = 0;
static shared SpinLock kidLock;
private int iteration = 1;
private unsigned int tmpStack[InitIdStackSize];
private int myHit;
static shared int start = FALSE;
static shared usclk_t startTime;
static shared endCount;
static shared SpinLock startLock;
void OutOfRoom();
/* Initialization routines */
/* Init profiling data structures */
static void OverflowSetup (o)
GraphEntry *o;
{
GraphTableInit(o, InitOverflowSize);
overflow = o;
overflowSize = InitOverflowSize;
overflowOccurred = 0;
}
void ProcessorListInit ()
{
int i;
Thread *t;
Processor *p;
for (i = 0; i < NUMPROCS; i++)
{
p = &processorList[i];
t = &p->idleThread;
t->type = ThreadType;
t->idStack.base = &p->idStack[0];
if (i == 0)
t->idStack.base->procID = StartID | BusyState;
else
t->idStack.base->procID = StartID | SpinState;
t->idStack.top = t->idStack.base;
t->idStack.limit = t->idStack.base + InitIdStackSize - 1;
p->curThread = t;
p->synchList = NULL;
SLNPInit(&p->profLock);
p->numSamples = 0;
}
}
/* External entry point to initialize external data structures */
void ProfileInit (numProfilers)
int numProfilers;
{
int i;
if (mEndOfText == 0)
{
fprintf(stderr, "Unable to profile: munge not run on object\n");
exit(0);
}
pcTableSize = (mEndOfText + sizeof(GraphEntry)) / sizeof(GraphEntry);
pcTable = MyShmalloc(GraphEntry, pcTableSize);
GraphTableInit(pcTable, pcTableSize);
pcTableLocks = MyShmalloc(SpinLock, pcTableSize);
for (i = 0; i < pcTableSize; i++)
SLNPInit(&pcTableLocks[i]);
OverflowSetup(overflowFirst);
SLNPInit(&overflowLock);
procData = MyShmalloc(ConcurrentData, mNumProcIds);
ConDataTableInit(procData, mNumProcIds);
SLNPInit(&objLock);
SLNPInit(&kidLock);
endCount = numProfilers;
SLNPInit(&startLock);
effectiveParallelism = nominalParallelism = 1;
}
void ProfileSetAllBusy ()
{
int i;
for (i = 0; i < numProcessors; i++)
{
processorList[i].idleThread.idStack.top++;
processorList[i].idleThread.idStack.top->procID = ForkID | BusyState;
}
effectiveParallelism = nominalParallelism = numProcessors;
}
void SetProfileOn ()
{
profileOn = TRUE;
}
void SetProfileOff ()
{
profileOn = FALSE;
}
/* Runtime profiling routines, for normal processors (eg, mcount) */
/* does the same thing as mcount, but ignore recursion */
void TPushOnIdStack (t, p, s)
register Thread *t;
register SynchProfile *p;
unsigned int s;
{
register IdStackEntry *ePtr;
unsigned int callerID;
ASSERT(t->type == ThreadType);
ASSERT(p->type == SynchProfileType);
ePtr = t->idStack.top;
ePtr->procID |= OverheadState;
callerID = ePtr->procID & AllOffMask;
if (p->g.callerID == callerID)
AtomicIncrP(&(p->g.num));
else
ProfileMustAdd((unsigned int)p, callerID, &p->g);
if (ePtr >= t->idStack.limit)
OutOfRoom(); /* die */
(ePtr + 1)->procID = NoID;
ePtr->procID &= OverheadOffMask;
t->idStack.top = ++ePtr;
ePtr->procID = (s) | (int)p;
}
void CallAndReplaceOnIdStack (p, s)
register SynchProfile *p;
unsigned int s;
{
register Thread *t = pP.thread;
register IdStackEntry *ePtr = t->idStack.top;
unsigned int callerID;
ASSERT(p->type == SynchProfileType);
ePtr->procID |= OverheadState;
callerID = ePtr->procID & AllOffMask;
if (p->g.callerID == callerID)
AtomicIncrP(&(p->g.num));
else
ProfileMustAdd((unsigned int)p, callerID, &p->g);
ReplaceOnIdStack(p,s);
}
/* nasty: how to make sure we're free from deadlock
* on overflow, provided the procedures we call don't overflow
* on interrupts
*/
void ProfileMustAdd (calleeID, callerID, p)
unsigned int calleeID;
unsigned int callerID;
GraphEntry *p;
{
register GraphEntry *q;
register SpinLock *l = NULL;
register int old;
for (q = p; q->calleeID != calleeID || q->callerID != callerID; q = q->next)
{
while (q->next == NULL)
{
if (!l)
l = id2lock(calleeID);
old = profileOn;
profileOn = FALSE; /* in case we get an interrupt */
if (!SLNPTestAndGet(l))
{
profileOn = old;
continue;
}
if (q->next != NULL)
{
SLNPRelease(l);
profileOn = old;
break;
}
if (q->num != 0) /* have to get one from overflow */
{
SLNPAcquire(&overflowLock);
if (overflowSize == 0)
OverflowSetup(MyShmalloc(GraphEntry,InitOverflowSize));
q->next = &overflow[--overflowSize];
SLNPRelease(&overflowLock);
q = q->next;
}
q->num = 1;
q->calleeID = calleeID;
q->callerID = callerID;
SLNPRelease(l);
profileOn = old;
return;
}
}
AtomicIncrP(&(q->num));
}
/* Runtime sampling routines */
/* return t1 - t2 */
static FLOAT ComputeDiff (t1, t2)
usclk_t t1, t2;
{
usclk_t d;
if (t1 < t2)
d = t1 + (0xffffffff - t2);
else
d = t1 - t2;
return((FLOAT)d);
}
static int Bound (n, lb, ub)
int n, lb, ub;
{
if (n < lb)
return(lb);
if (n > ub)
return(ub);
return(n);
}
static int SampleStack (t, eff, nom)
register Thread *t;
int *eff, *nom;
{
int eP, nP;
register IdStackEntry *e;
register unsigned int *sp = tmpStack;
/* Sampling begins */
eP = effectiveParallelism;
nP = nominalParallelism;
for (e = t->idStack.top; e >= t->idStack.base; e--, sp++)
*sp = e->procID;
/* Sampling ends */
*eff = Bound(eP, 1, MaxEffectiveParallelism) - 1;
*nom = (nP < numProcessors) ? 0 : 1;
iteration++;
return(sp - tmpStack);
}
ChildData *GetChildData ()
{
ChildData *k;
SLNPAcquire(&kidLock);
if (--kidNum < 0)
{
kidData = MyShmalloc(ChildData, InitNumKids);
ChildTableInit(kidData, InitNumKids);
kidNum = InitNumKids - 1;
}
k = &kidData[kidNum];
SLNPRelease(&kidLock);
return(k);
}
SynchSamples *GetSampleSpace ()
{
SynchSamples *s;
SLNPAcquire(&objLock);
if (--objNum < 0)
{
objData = MyShmalloc(SynchSamples, InitNumObjs);
SynchTableInit(objData, InitNumObjs);
objNum = InitNumObjs - 1;
}
s = &objData[objNum];
SLNPRelease(&objLock);
return(s);
}
static void AddSample (data, diff, eP, nP, first, callee)
ConcurrentData *data;
FLOAT diff;
int eP, nP, first;
unsigned int callee;
{
ChildData *k;
ASSERT(data->type == ConcurrentDataType && diff >= 0);
ASSERT((eP >= 0 && eP < MaxEffectiveParallelism) && (nP == 0 || nP == 1));
if (data->hit[myHit] < iteration)
{
data->hit[myHit] = iteration;
SLNPAcquire(&data->lock);
data->nom.byNomP[MePlusKids][BUSY][nP] += diff;
if (first)
{
data->busy.byEffP[eP] += diff;
data->nom.byNomP[JustMe][BUSY][nP] += diff;
}
else /* mark where busy time came from */
{
for (k = &data->kid; k->calleeID != callee; k = k->next)
if (k->next == NULL)
{
if (k->calleeID != NoID)
{
k->next = GetChildData();
k = k->next;
}
k->calleeID = callee;
break;
}
k->busy.byEffP[eP] += diff;
}
SLNPRelease(&data->lock);
}
}
static void AddNomSample (data, diff, nP, state, first)
ConcurrentData *data;
FLOAT diff;
int nP, state, first;
{
ASSERT(data->type == ConcurrentDataType && (nP == 0 || nP == 1));
ASSERT((state >= 0 || state < NumStates) && diff >= 0);
if (data->hit[myHit] < iteration)
{
data->hit[myHit] = iteration;
SLNPAcquire(&data->lock);
if (first)
data->nom.byNomP[JustMe][state][nP] += diff;
data->nom.byNomP[MePlusKids][state][nP] += diff;
SLNPRelease(&data->lock);
}
}
static ConcurrentData *id2data (id)
unsigned int id;
{
SynchProfile *p;
id &= AllOffMask;
if (isSynchID(id))
{
p = (SynchProfile *)id;
ASSERT(p->type == SynchProfileType);
if (p->samples == NULL)
p->samples = GetSampleSpace();
return(&p->samples->data);
}
return(&procData[id]);
}
static usclk_t ProfileProc (p)
Processor *p;
{
int i, stackDepth, eP, nP;
usclk_t next;
FLOAT diff;
next = GETUSCLK();
stackDepth = SampleStack(p->curThread, &eP, &nP);
diff = ComputeDiff(next, p->lastSample);
p->lastSample = next;
if (stackDepth == 0 || isOverhead(tmpStack[0]))
AddSample(&procData[NoID], diff, eP, nP, TRUE, NoID);
else if (isSpinning(tmpStack[0]))
for (i = 0; i < stackDepth; i++)
AddNomSample(id2data(tmpStack[i]), diff, nP, SPIN, i == 0);
else
{
AddSample(id2data(tmpStack[0]), diff, eP, nP, TRUE, NoID);
for (i = stackDepth - 1; i > 0; i--)
AddSample(id2data(tmpStack[i]), diff, eP, nP, FALSE,
(unsigned int)(tmpStack[i-1] & AllOffMask));
}
}
static void ProfileSynch (p)
SynchProfile *p;
{
int i, n[NumNumbers];
usclk_t next;
FLOAT diff;
register Thread *t;
int stackDepth, eP, nP, type;
if (p->status != ACTIVE)
return;
for (i = 0; i < NumNumbers; i++)
n[i] = p->number[i];
next = GETUSCLK();
if (t = p->thread)
stackDepth = SampleStack(t, &eP, &nP);
diff = ComputeDiff(next, p->lastSample);
if (t && stackDepth != 0 && !isOverhead(tmpStack[0]) && !isBusy(tmpStack[0])
&& !isSpinning(tmpStack[0]))
{
if (isBlocked(tmpStack[0]))
type = BLOCKED;
else
type = READY;
for (i = 0; i < stackDepth; i++)
AddNomSample(id2data(tmpStack[i]), diff, nP, type, i == 0);
}
if (!p->samples)
p->samples = GetSampleSpace();
for (i = 0; i < NumNumbers; i++)
{
n[i] = Bound(n[i], 0, MaxNominalParallelism - 1);
p->samples->queue.length[i][n[i]] += diff;
}
p->lastSample = next;
}
void ProfileExternal ()
{
register int i;
register Processor *p;
register SynchProfile *s;
SLNPAcquire(&startLock);
if (!start)
{
start = TRUE;
startTime = GETUSCLK();
for (i = 0; i < numProcessors; i++)
processorList[i].lastSample = startTime;
}
SLNPRelease(&startLock);
myHit = pP.myId - numProcessors;
while (!profileOver)
{
for (i = 0; i < numProcessors && !profileOver; i++)
{
p = &processorList[i];
if (SLNPTestAndGet(&p->profLock))
{
ProfileProc(p);
for (s = p->synchList; s && !profileOver; s = s->next)
ProfileSynch(s);
p->numSamples++;
SLNPRelease(&p->profLock);
}
}
}
SLNPAcquire(&startLock);
if (--endCount == 0) /* wait for everybody to check in */
{
#ifndef DEBUG
KillAll();
#endif
for (i = 0; i < numProcessors; i++)
{
timeDiff += ComputeDiff(processorList[i].lastSample, startTime);
numSamples += processorList[i].numSamples;
}
timeDiff /= numProcessors;
numSamples /= numProcessors;
DumpInfo();
#ifdef DEBUG
KillAll();
#endif
}
SLNPRelease(&startLock);
exit(0);
}
void TooHigh ()
{
printf("Fatal error: mcount() passed an out-of-bound processor ID.\n");
fflush(stdout);
KillAll();
exit(1);
}
void OutOfRoom ()
{
printf("Fatal error: profiler ID stack is out of room.\n");
fflush(stdout);
KillAll();
exit(1);
}
void ProfileFinish ()
{
profileOver = TRUE;
}